*This code re-codes the comune level data and merges it with the Meetup info 
/*
Procedure: 
The data comes in wide format. Thus:
1) We code the pre-treatment covariates for all respondents 
2) We keep only the pre-treatment covariates and the DVs 
3) We re-shape the file into long format dropping pre-treatment covariates into single variables, 
while all DVs remain 2 variables (pre post). 
4) We can then subtract pre post Dvs such that we can estimate the change in DiD designs. 
*/

* directory definitions 

	project, doinfo
	global pdir "`r(pdir)'"							// the project's main dir.
	global dofile "`r(dofile)'"						// do-file's stub name
	global data_original = "$pdir/data_original"  //data directory for coded data 
	global data_coded = "$pdir/data_coded"  //data directory for coded data 
	global figures = "$pdir/results/figures"  //data directory for figures
	global tables = "$pdir/results/tables"  //data directory for tables


*********************************************
******comune level election results**********
*********************************************
/*
Here we open the election data file:
This data contains much more information then we need, 
e.g. also prior election results. 

Since we are only interested in the changes between 
2018 & 2013 we only keep this information and delete
the remaining year:
*/
	project, original("$data_original/elections/elections.dta")
	use "$data_original/elections/elections.dta", clear 

* keep only years we need: 

	keep if year==2013 | year==2018

* do some cleaning of data: 	
	rename istatcode comune_id
	sort comune_id year 
	bysort comune_id: carryforward regione, replace 
	bysort comune_id: carryforward provincia, replace 
	gsort comune_id -year 
	bysort comune_id: carryforward regione, replace 
	bysort comune_id: carryforward provincia, replace 

	rename FORZAITALIA_* forza_*
	rename LEGA_* lega_*
	rename MOVIMENTO5STELLE_* m5stelle_*
	rename PARTITODEMOCRATICO_* pd_* 
	rename ILPOPOLODELLALIBERTA_* pdl_*

*some parties changed their names but are the same:
	replace m5stelle_C=MOVIMENTO5STELLEBEPPEGRILLOI_C if year==2013
	replace m5stelle_S=MOVIMENTO5STELLEBEPPEGRILLOI_S if year==2013
	replace pd_C=VALLEEDAOSTE_C if provincia=="Aosta"
	replace pd_S=VALLEEDAOSTE_S if provincia=="Aosta"
	replace lega_S=LEGANORD_S if year==2013 
	replace lega_C=LEGANORD_C if year==2013 

* code turnout from voters + registered voters:
	gen turnout=(Numerovotanti/Numeroelettori)*100


* code % votes in 100:	
	foreach var of varlist forza_* lega_* m5stelle_* pd_* pdl_* {
		gen `var'_per=(`var'/Numeroelettori)*100
	}

* keep only the variables we will actually need: 
	keep comune_id year m5stelle_S_per m5stelle_C_per pd_S_per pd_C_per pdl_C_per pdl_S_per lega_C_per lega_S_per turnout

* reshape the data in wide format: 
	/*
	REMEMVER: we analyze a cross section (referendum result)
	Thus, we do not need a tscs data but a single cs!
	*/
	reshape wide m5stelle_S_per m5stelle_C_per pd_S_per pd_C_per pdl_C_per pdl_S_per lega_C_per lega_S_per turnout, i(comune_id) j(year)

* rename all variables into meaningful names:
	rename m5stelle_C_per2013 m5s_c_13
	rename m5stelle_S_per2013 m5s_s_13
	rename m5stelle_C_per2018 m5s_c_18
	rename m5stelle_S_per2018 m5s_s_18
	
	rename pd_C_per2013 pd_c_13
	rename pd_S_per2013 pd_s_13
	rename pd_C_per2018 pd_c_18
	rename pd_S_per2018 pd_s_18
	
	rename pdl_C_per2013 pdl_c_13
	rename pdl_S_per2013 pdl_s_13
	rename pdl_C_per2018 pdl_c_18
	rename pdl_S_per2018 pdl_s_18
	
	rename lega_C_per2013 lega_c_13
	rename lega_S_per2013 lega_s_13
	rename lega_C_per2018 lega_c_18
	rename lega_S_per2018 lega_s_18

	rename turnout2013 turnout_13
	rename turnout2018 turnout_18

* and label them:
	lab var m5s_c_13 "M5S: \% votes 2013"
	lab var m5s_s_13 "M5S: \% votes 2013"
	lab var m5s_c_18 "M5S: \% votes 2018"
	lab var m5s_s_18 "M5S: \% votes 2018"
	
	lab var pd_c_13 "PD: \% votes 2013"
	lab var pd_s_13 "PD: \% votes 2013"
	lab var pd_c_18 "PD: \% votes 2018"
	lab var pd_s_18 "PD: \% votes 2018"
	
	lab var pdl_c_13 "PdL: \% votes 2013"
	lab var pdl_s_13 "PdL: \% votes 2013"
	lab var pdl_c_18 "PdL: \% votes 2018"
	lab var pdl_s_18 "PdL: \% votes 2018"
	
	lab var lega_c_13 "Lega: \% votes 2013"
	lab var lega_s_13 "Lega: \% votes 2013"
	lab var lega_c_18 "Lega: \% votes 2018"
	lab var lega_s_18 "Lega: \% votes 2018"
	
	lab var turnout_13 "\% turnout 2013"
	lab var turnout_18 "\% turnout 2018"
	
* save into contemporary file:
	tempfile KB_itcomuneelection
	save `KB_itcomuneelection'

*******************************
******population 2016 data*****
*******************************

/*
Here we open the population files retrieved from the ministry. 
Do some cleanings before merge:
*/
	project, original("$data_original/comune info/area.csv")

	import delimited "$data_original/comune info/area.csv", clear

* rename variables such that merge possible:	
	rename pro_com comune_id

* create population density:	
	gen comune_area_2016= shape_area/1000000
	
* keep only the infos we need: 
	keep comune_id comune_area_2016

* safe temporary before merge: 
	tempfile KB_comune_2016
	save `KB_comune_2016'

*******************************
******population 2001 data*****
*******************************

	**some covariates on comunes:
	project, original("$data_original/comune info/Elenco_comuni_italiani_1_gennaio_2009.xls")

	import excel "$data_original/comune info/Elenco_comuni_italiani_1_gennaio_2009.xls", sheet("COMUNI 01_01_2009") firstrow clear 
	rename E comune_id
	
	gen population_2001=Popolazionelegale200121102
	gen comune_coast=Comunelitoraneo
	gen comune_mount=ComuneMontano
	gen comune_area=Superficieterritorialetotale
	gen comune_elev=Altitudinedelcentrometri
	gen comune_density=Popolazionelegale200121102/Superficieterritorialetotale
	
	keep comune_id population_2001 comune_*
	
	tempfile cov_2001
	save `cov_2001' 


*******************************
******m5s meetup data**********
*******************************
/*
Here we open the m5s meetup data we webscraped.
Do some cleanings before merge:
*/
	project, original("$data_coded/events_comune_neigh_nga.csv")

	import delimited "$data_coded/events_comune_neigh_nga.csv", clear 

* data reports missings if 0, so replace: 
	foreach w of varlist n_total-hist_wn_late {
		replace `w'=0 if `w'==.
	}

* Create a variable measuring all M5S events AFTER referendum (placebo): 
	gen h1=first_event
	drop first_event
	gen first_event = date(h1, "YMD")
	format first_event %td
	drop h1 
	gen postdate=mdy(12,04,2016)
	replace postdate=. if first_event==.

	gen placebo=0
	replace placebo=1 if first_event>postdate

	gen placebo_treat=placebo*n_posttreat
	gen placebo_wn_treat=placebo*wn_posttreat

* some date cleanings:
	replace hist_days=round(hist_days, 0.1)
	replace hist_days=0 if hist_days==.

* check if duplicates exist: 
	duplicates report comune_id
	// 7998 unique comune

* save as temporary file: 
	tempfile meetup
	save `meetup'

*******************************
******Campante Internet data***
*******************************
/*
Our instrumental variable approach is based on: 
Campante, Filipe, Ruben Durante, and Francesco Sobbrio. 2018. “Politics 2.0: The Multifaceted Effect
of Broadband Internet on Political Participation.” Journal of the European Economic Association 16
(4): 1094–1136.

They shared their data with us, which we then merge here with our data.
*/
	project, original("$data_original/Campanteetal/ugs_politics_2.0.dta")

	use "$data_original/Campanteetal/ugs_politics_2.0.dta", clear 
	rename istat_code_110 comune_id
	rename distance_closest_SGU km_to_ugs

	tempfile Campanteetal
	save `Campanteetal'

*******************************
******Schaub/Morisi Internet data***
*******************************
/*
Our instrumental variable approach is based on: 
Schaub, Max, and Davide Morisi. 2020. “Voter Mobilisation in the Echo Chamber: Broadband Internet
and the Rise of Populism in Europe.” European Journal of Political Research

Replication file downloaded:
https://ejpr.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1111%2F1475-6765.12373&file=ejpr12373-sup-0002-Replicationdata.zip
*/
	project, original("$data_original/SchaubMorisi/Italy/italy_broadband.dta")

	use "$data_original/SchaubMorisi/Italy/italy_broadband.dta", clear
	
	*rename comune ID variable to match ours: 	
	rename id_comunenew2 comune_id

	tempfile SchaubMorisi
	save `SchaubMorisi'

******************************************************
********USING REGIONAL DATA AND MERGE WITH M5S********
******************************************************
	project, original("$data_original/referendum results/referendum.dta")

	use "$data_original/referendum results/referendum.dta", clear 

	* we do some renaming in order to prepare merges 
	rename istatmunicipalitycode comune_id
	drop if comune_id==.

	* then we merge with our M5S event data: 
	merge 1:1 comune_id using `meetup'
	drop if _merge==1
	drop _merge

	* then we merge with 2016 area information: 
	merge 1:1 comune_id using `KB_comune_2016'
	drop if _merge==1
	drop _merge

	* then merge with 2001 population infos: 
	merge 1:1 comune_id using `cov_2001' 
	drop if _merge==2
	drop _merge

* one comune is missing!
* we code the information from infos from the ministry of interior:
* https://elezionistorico.interno.gov.it/index.php?tpel=F&dtel=04/12/2016&tpa=I&tpe=C&lev0=0&levsut0=0&lev1=3&levsut1=1&lev2=15&levsut2=2&levsut3=3&ne1=3&ne2=15&es0=S&es1=S&es2=S&es3=N&ms=S&ne3=151790&lev3=1790
	replace nvoters=1085 if comune_id==17189
	replace nconstituents=1076 if comune_id==17189
	replace referendumyes=458 if comune_id==17189
	replace referendumno=618 if comune_id==17189

*we clean the regional level controls:
	**referendum voting:
	gen turnout=(nvoters/nconstituents)*100
	gen referendum_yes=(referendumyes/nvoters)*100
	gen referendum_no=(referendumno/nvoters)*100
	drop referendumyes referendumno

	**foreigners 
	gen foreigners=(foreigners_total/comune_population)*100
	
	**education 
	replace university="." if university==".."
	destring university, replace
	rename university h1 
	gen university=(h1/comune_population)*100
	drop h1 
	
	foreach var of varlist literate_no_formal_educ_65 literate_no_formal_educ {
		replace `var'="." if `var'==".."
		destring `var', replace
	}
	
	gen no_edu=((literate_no_formal_educ_65+literate_no_formal_educ+primary_school)/comune_population)*100
	
	**unemployment 
	replace unemployment="." if unemployment==".."
	destring unemployment, replace
	
	**gdp 
	gen income=taxableincome/comune_population
	gen log_income=log(taxableincome)
	
	**population 
	gen log_pop=log(comune_population)
	
	**urbanity from pop: 
	gen urbanity=0 
	replace urbanity=1 if comune_population>=5000 & comune_population<10000
	replace urbanity=2 if comune_population>=10000 & comune_population<30000
	replace urbanity=3 if comune_population>=30000 & comune_population<100000
	replace urbanity=4 if comune_population>=100000 & comune_population<250000
	replace urbanity=5 if comune_population>=250000 
	
	**encoding of regional identifiers:
	drop region_id
	encode region, gen(region_id)

	** some identifiers are missing.
	** Thus, we drop the variable and clean by hand:
	drop province_id

	**some recodes needed in order to get province for each comune (done by hand):
	replace province="Medio Campidano" if comune=="Arbus"
	replace province="Medio Campidano" if comune=="Barumini"
	replace province="Carbonia-Iglesias" if comune=="Buggerru"
	replace province="Carbonia-Iglesias" if comune=="Calasetta"
	replace province="Carbonia-Iglesias" if comune=="Carbonia"
	replace province="Carbonia-Iglesias" if comune=="Carloforte"
	replace province="Medio Campidano" if comune=="Collinas"
	replace province="Carbonia-Iglesias" if comune=="Domusnovas"
	replace province="Carbonia-Iglesias" if comune=="Fluminimaggiore"
	replace province="Medio Campidano" if comune=="Furtei"
	replace province="Medio Campidano" if comune=="Genuri"
	replace province="Medio Campidano" if comune=="Gesturi"
	replace province="Carbonia-Iglesias" if comune=="Giba"
	replace province="Carbonia-Iglesias" if comune=="Gonnesa"
	replace province="Medio Campidano" if comune=="Gonnosfanadiga"
	replace province="Medio Campidano" if comune=="Guspini"
	replace province="Carbonia-Iglesias" if comune=="Iglesias"
	replace province="Medio Campidano" if comune=="Las Plassas"
	replace province="Medio Campidano" if comune=="Lunamatrona"
	replace province="Carbonia-Iglesias" if comune=="Masainas"
	replace province="Carbonia-Iglesias" if comune=="Musei"
	replace province="Carbonia-Iglesias" if comune=="Narcao"
	replace province="Carbonia-Iglesias" if comune=="Nuxis"
	replace province="Medio Campidano" if comune=="Pabillonis"
	replace province="Medio Campidano" if comune=="Pauli Arbarei"
	replace province="Carbonia-Iglesias" if comune=="Perdaxius"
	replace province="Carbonia-Iglesias" if comune=="Piscinas"
	replace province="Carbonia-Iglesias" if comune=="Portoscuso"
	replace province="Medio Campidano" if comune=="Samassi"
	replace province="Medio Campidano" if comune=="San Gavino Monreale"
	replace province="Carbonia-Iglesias" if comune=="San Giovanni Suergiu"
	replace province="Medio Campidano" if comune=="Sanluri"
	replace province="Carbonia-Iglesias" if comune=="Sant'Anna Arresi"
	replace province="Carbonia-Iglesias" if comune=="Sant'Antioco"
	replace province="Carbonia-Iglesias" if comune=="Santadi"
	replace province="Medio Campidano" if comune=="Sardara"
	replace province="Medio Campidano" if comune=="Segariu"
	replace province="Medio Campidano" if comune=="Serramanna"
	replace province="Medio Campidano" if comune=="Serrenti"
	replace province="Medio Campidano" if comune=="Setzu"
	replace province="Medio Campidano" if comune=="Siddi"
	replace province="Carbonia-Iglesias" if comune=="Tratalias"
	replace province="Medio Campidano" if comune=="Tuili"
	replace province="Medio Campidano" if comune=="Turri"
	replace province="Medio Campidano" if comune=="Ussaramanna"
	replace province="Medio Campidano" if comune=="Villacidro"
	replace province="Medio Campidano" if comune=="Villamar"
	replace province="Carbonia-Iglesias" if comune=="Villamassargia"
	replace province="Medio Campidano" if comune=="Villanovaforru"
	replace province="Medio Campidano" if comune=="Villanovafranca"
	replace province="Carbonia-Iglesias" if comune=="Villaperuccio"
	
	**encode it: 
	encode province, gen(province_id)

	** single ID for each case:
	drop id 
	gen id=_n

	*we standardize the exposure variables as discussed in the paper:
	gen exposure_pop=nvoters/1000

	foreach var of varlist placebo_* n_total-hist_wn_late {
		gen std_`var'=log((`var'/exposure_pop)+1)
	}

	**creating a binary exposure indicator: 
	gen m5s_ever=0
	replace m5s_ever=1 if n_total>=1
	
	**creating dummy for early M5S:
	gen early_dummy=0
	replace early_dummy=1 if hist_n_early>0
	
	**creating a binary exposure indicator for M5S during referendum: 
	gen m5s_ref_ever=0
	replace m5s_ref_ever=1 if n_treat_campaign>0
	
	**creating a binary exposure indicator for M5S during short referendum period: 
	gen m5s_ref_ever_short=0
	replace m5s_ref_ever_short=1 if n_treat_campaign_short>0
	
	
	**creating a binary exposure indicator for adjacent M5S during referendum: 
	gen m5s_adjacent_ever=0
	replace m5s_adjacent_ever=1 if n_neigh_treat_campaign>0
	
* Now we merge this final data the cleaned data we proceed above:
	
	* cross section election results: 
	merge 1:1 comune_id using `KB_itcomuneelection'
	drop if _merge==2
	drop _merge

	* replication data from Campanteetal:
	merge m:m comune_id using `Campanteetal'
	drop if _merge==2
	drop _merge

	* replication data from Schaub and Morisi (for instrument validation):
	merge 1:1 comune_id using `SchaubMorisi'
	drop if _merge==2
	drop _merge

	* clean internet: 
	foreach var of varlist adsl* {
		replace `var'=(1-`var')*100
	}
	gen adsl18=nga_coverage
	
	lab var adsl12 "\% population with broadband internet in 2012"
	lab var adsl13 "\% population with broadband internet in 2013"
	lab var adsl14 "\% population with broadband internet in 2014"
	lab var adsl15 "\% population with broadband internet in 2015"
	lab var adsl18 "\% population with broadband internet in 2018"
	
	* produce log of days + km to UGS as outlined in paper: 
	gen log_hist=log(hist_days+1)
	gen log_ugs=log(km_to_ugs+1)

	*population density: 
	gen pop_density_16=comune_population/comune_area_2016
	gen pop_density_01=population_2001/comune_area
	
	**labeling variables 
	lab var pop_density_16 "population density 2016"
	lab var comune_area_2016 "area 2016"
	lab var population_2001 "Population in 2001"
	lab var hist_days "days since formation"
	lab var log_hist "log days since formation"
	lab var log_ugs "log distance to closest UGS (in km)"
	lab var referendum_no "\% No in referendum"
	lab var std_wn_treat_campaign "M5S activity (cont.)"
	lab var std_wn_neigh_treat_campaign "M5S adjacent activity (cont.)"
	lab var std_n_treat_campaign "M5S: referendum {it:(events only)}"
	lab var std_hist_wn_early "M5S: phase I"
	lab var std_hist_wn_mid "M5S: phase II"
	lab var std_hist_n_early "M5S: phase I {it:(events only)}"
	lab var std_hist_n_mid "M5S: phase II {it:(events only)}"
	lab var urbanity "urbanity"
	label define urbanity 0 "<5 000" 1 "<10 000" 2 "<30 000" 3 "<100 000" 4 "<250 000" 5 "$\geq$ 250 000"
	label values urbanity urbanity
	lab var std_n_total "M5S: since 2005"
	lab var std_n_treat_24m "M5S: 24 months before referendum"
	lab var std_n_treat_12m "M5S: 12 months before referendum"
	lab var std_n_treat_6m "M5S: 6 months before referendum"
	lab var std_n_treat_90d "M5S: 3 months before referendum"
	lab var std_n_treat_60d "M5S: 2 months before referendum"
	lab var std_n_treat_30d "M5S: 1 month before referendum"
	lab var foreigners "\% foreigners"
	lab var log_pop "population (logged)"
	lab var no_edu "\% low education"
	lab var university "\% university degree"
	lab var unemployment "\% unemployed"
	lab var log_income "income (logged)"
	lab var income "income per cap"
	lab var nga_coverage "internet: coverage (0-1)"
	lab var nga_dummy "internet: coverage (0,1)"
	lab var std_placebo_treat "M5S: placebo"
	lab var m5s_ref_ever "M5S activity (0,1)"
	lab var m5s_adjacent_ever "M5S adjacent: binary"
	lab var m5s_ref_ever_short "M5S activity (0,1)"
	lab var std_wn_treat_campaign_short "M5S activity (cont.)"
	lab var std_wn_posttreat "M5S post referendum"
	lab var std_wn_treat_loc_indoor "M5S indoor"
	lab var std_wn_treat_loc_outdoor "M5S outdoor"

	**final cleanings and ordering       
	order id comune comune_id province* region* turnout referendum_yes referendum_no
	
	
	**save and be done:
	save "$data_coded/placebased_regional.dta", replace 

	*codebookout "$data_coded/codebookplacebased_regional", replace
	
	project, creates("$data_coded/placebased_regional.dta")
	